{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_excel('/home/awantik/Downloads/Online Retail.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "from mlxtend.frequent_patterns import apriori, association_rules,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 541909 entries, 0 to 541908\n",
      "Data columns (total 8 columns):\n",
      "InvoiceNo      541909 non-null object\n",
      "StockCode      541909 non-null object\n",
      "Description    540455 non-null object\n",
      "Quantity       541909 non-null int64\n",
      "InvoiceDate    541909 non-null datetime64[ns]\n",
      "UnitPrice      541909 non-null float64\n",
      "CustomerID     406829 non-null float64\n",
      "Country        541909 non-null object\n",
      "dtypes: datetime64[ns](1), float64(2), int64(1), object(4)\n",
      "memory usage: 33.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536365</td>\n",
       "      <td>85123A</td>\n",
       "      <td>WHITE HANGING HEART T-LIGHT HOLDER</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536365</td>\n",
       "      <td>71053</td>\n",
       "      <td>WHITE METAL LANTERN</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536365</td>\n",
       "      <td>84406B</td>\n",
       "      <td>CREAM CUPID HEARTS COAT HANGER</td>\n",
       "      <td>8</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029G</td>\n",
       "      <td>KNITTED UNION FLAG HOT WATER BOTTLE</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029E</td>\n",
       "      <td>RED WOOLLY HOTTIE WHITE HEART.</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InvoiceNo StockCode                          Description  Quantity  \\\n",
       "0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   \n",
       "1    536365     71053                  WHITE METAL LANTERN         6   \n",
       "2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   \n",
       "3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   \n",
       "4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   \n",
       "\n",
       "          InvoiceDate  UnitPrice  CustomerID         Country  \n",
       "0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  \n",
       "1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  \n",
       "3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "United Kingdom          495478\n",
       "Germany                   9495\n",
       "France                    8557\n",
       "EIRE                      8196\n",
       "Spain                     2533\n",
       "Netherlands               2371\n",
       "Belgium                   2069\n",
       "Switzerland               2002\n",
       "Portugal                  1519\n",
       "Australia                 1259\n",
       "Norway                    1086\n",
       "Italy                      803\n",
       "Channel Islands            758\n",
       "Finland                    695\n",
       "Cyprus                     622\n",
       "Sweden                     462\n",
       "Unspecified                446\n",
       "Austria                    401\n",
       "Denmark                    389\n",
       "Japan                      358\n",
       "Poland                     341\n",
       "Israel                     297\n",
       "USA                        291\n",
       "Hong Kong                  288\n",
       "Singapore                  229\n",
       "Iceland                    182\n",
       "Canada                     151\n",
       "Greece                     146\n",
       "Malta                      127\n",
       "United Arab Emirates        68\n",
       "European Community          61\n",
       "RSA                         58\n",
       "Lebanon                     45\n",
       "Lithuania                   35\n",
       "Brazil                      32\n",
       "Czech Republic              30\n",
       "Bahrain                     19\n",
       "Saudi Arabia                10\n",
       "Name: Country, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.Country.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Identify number of customers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4373"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data.CustomerID.unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Identify customer doing most purchasing & amount"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['TotalPrice'] = data['Quantity'] * data['UnitPrice']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = data.groupby(['CustomerID']).TotalPrice.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CustomerID\n",
       "14646.0    279489.02\n",
       "18102.0    256438.49\n",
       "17450.0    187482.17\n",
       "14911.0    132572.62\n",
       "12415.0    123725.45\n",
       "             ...    \n",
       "12503.0     -1126.00\n",
       "17603.0     -1165.30\n",
       "14213.0     -1192.20\n",
       "15369.0     -1592.49\n",
       "17448.0     -4287.63\n",
       "Name: TotalPrice, Length: 4372, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res.sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 541909 entries, 0 to 541908\n",
      "Data columns (total 9 columns):\n",
      "InvoiceNo      541909 non-null object\n",
      "StockCode      541909 non-null object\n",
      "Description    540455 non-null object\n",
      "Quantity       541909 non-null int64\n",
      "InvoiceDate    541909 non-null datetime64[ns]\n",
      "UnitPrice      541909 non-null float64\n",
      "CustomerID     406829 non-null float64\n",
      "Country        541909 non-null object\n",
      "TotalPrice     541909 non-null float64\n",
      "dtypes: datetime64[ns](1), float64(3), int64(1), object(4)\n",
      "memory usage: 37.2+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Clean Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "573585     1114\n",
       "581219      749\n",
       "581492      731\n",
       "580729      721\n",
       "558475      705\n",
       "           ... \n",
       "564076        1\n",
       "564048        1\n",
       "564047        1\n",
       "564046        1\n",
       "C576576       1\n",
       "Name: InvoiceNo, Length: 25900, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.InvoiceNo.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Finding all the credit records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "      <th>TotalPrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>C536379</td>\n",
       "      <td>D</td>\n",
       "      <td>Discount</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 09:41:00</td>\n",
       "      <td>27.50</td>\n",
       "      <td>14527.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-27.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>C536383</td>\n",
       "      <td>35004C</td>\n",
       "      <td>SET OF 3 COLOURED  FLYING DUCKS</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 09:49:00</td>\n",
       "      <td>4.65</td>\n",
       "      <td>15311.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-4.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>C536391</td>\n",
       "      <td>22556</td>\n",
       "      <td>PLASTERS IN TIN CIRCUS PARADE</td>\n",
       "      <td>-12</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-19.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236</th>\n",
       "      <td>C536391</td>\n",
       "      <td>21984</td>\n",
       "      <td>PACK OF 12 PINK PAISLEY TISSUES</td>\n",
       "      <td>-24</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>0.29</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-6.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237</th>\n",
       "      <td>C536391</td>\n",
       "      <td>21983</td>\n",
       "      <td>PACK OF 12 BLUE PAISLEY TISSUES</td>\n",
       "      <td>-24</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>0.29</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-6.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>C536391</td>\n",
       "      <td>21980</td>\n",
       "      <td>PACK OF 12 RED RETROSPOT TISSUES</td>\n",
       "      <td>-24</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>0.29</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-6.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>C536391</td>\n",
       "      <td>21484</td>\n",
       "      <td>CHICK GREY HOT WATER BOTTLE</td>\n",
       "      <td>-12</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>3.45</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-41.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>C536391</td>\n",
       "      <td>22557</td>\n",
       "      <td>PLASTERS IN TIN VINTAGE PAISLEY</td>\n",
       "      <td>-12</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-19.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>C536391</td>\n",
       "      <td>22553</td>\n",
       "      <td>PLASTERS IN TIN SKULLS</td>\n",
       "      <td>-24</td>\n",
       "      <td>2010-12-01 10:24:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>17548.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-39.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>939</th>\n",
       "      <td>C536506</td>\n",
       "      <td>22960</td>\n",
       "      <td>JAM MAKING SET WITH JARS</td>\n",
       "      <td>-6</td>\n",
       "      <td>2010-12-01 12:38:00</td>\n",
       "      <td>4.25</td>\n",
       "      <td>17897.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-25.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441</th>\n",
       "      <td>C536543</td>\n",
       "      <td>22632</td>\n",
       "      <td>HAND WARMER RED RETROSPOT</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:30:00</td>\n",
       "      <td>2.10</td>\n",
       "      <td>17841.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-2.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1442</th>\n",
       "      <td>C536543</td>\n",
       "      <td>22355</td>\n",
       "      <td>CHARLOTTE BAG SUKI DESIGN</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-01 14:30:00</td>\n",
       "      <td>0.85</td>\n",
       "      <td>17841.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-1.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22244</td>\n",
       "      <td>3 HOOK HANGER MAGIC GARDEN</td>\n",
       "      <td>-4</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-7.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1974</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22242</td>\n",
       "      <td>5 HOOK HANGER MAGIC TOADSTOOL</td>\n",
       "      <td>-5</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-8.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1975</th>\n",
       "      <td>C536548</td>\n",
       "      <td>20914</td>\n",
       "      <td>SET/5 RED RETROSPOT LID GLASS BOWLS</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>2.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-2.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1976</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22892</td>\n",
       "      <td>SET OF SALT AND PEPPER TOADSTOOLS</td>\n",
       "      <td>-7</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.25</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1977</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22654</td>\n",
       "      <td>DELUXE SEWING KIT</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>5.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-5.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22767</td>\n",
       "      <td>TRIPLE PHOTO FRAME CORNICE</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>9.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-19.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1979</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22333</td>\n",
       "      <td>RETROSPOT PARTY BAG + STICKER SET</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-1.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1980</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22245</td>\n",
       "      <td>HOOK, 1 HANGER ,MAGIC GARDEN</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>0.85</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-1.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1981</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22077</td>\n",
       "      <td>6 RIBBONS RUSTIC CHARM</td>\n",
       "      <td>-6</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1982</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22631</td>\n",
       "      <td>CIRCUS PARADE LUNCH BOX</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-1.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1983</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22168</td>\n",
       "      <td>ORGANISER WOOD ANTIQUE WHITE</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>8.50</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-17.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1984</th>\n",
       "      <td>C536548</td>\n",
       "      <td>21218</td>\n",
       "      <td>RED SPOTTY BISCUIT TIN</td>\n",
       "      <td>-3</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>3.75</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1985</th>\n",
       "      <td>C536548</td>\n",
       "      <td>20957</td>\n",
       "      <td>PORCELAIN HANGING BELL SMALL</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>1.45</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-1.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>C536548</td>\n",
       "      <td>22580</td>\n",
       "      <td>ADVENT CALENDAR GINGHAM SACK</td>\n",
       "      <td>-4</td>\n",
       "      <td>2010-12-01 14:33:00</td>\n",
       "      <td>5.95</td>\n",
       "      <td>12472.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-23.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3170</th>\n",
       "      <td>C536606</td>\n",
       "      <td>20914</td>\n",
       "      <td>SET/5 RED RETROSPOT LID GLASS BOWLS</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-02 09:10:00</td>\n",
       "      <td>2.95</td>\n",
       "      <td>14092.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-5.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3338</th>\n",
       "      <td>C536622</td>\n",
       "      <td>22752</td>\n",
       "      <td>SET 7 BABUSHKA NESTING BOXES</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-02 10:37:00</td>\n",
       "      <td>8.50</td>\n",
       "      <td>12471.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-17.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3410</th>\n",
       "      <td>C536625</td>\n",
       "      <td>22839</td>\n",
       "      <td>3 TIER CAKE TIN GREEN AND CREAM</td>\n",
       "      <td>-2</td>\n",
       "      <td>2010-12-02 10:46:00</td>\n",
       "      <td>14.95</td>\n",
       "      <td>14766.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-29.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3693</th>\n",
       "      <td>C536642</td>\n",
       "      <td>21463</td>\n",
       "      <td>MIRRORED DISCO BALL</td>\n",
       "      <td>-1</td>\n",
       "      <td>2010-12-02 11:56:00</td>\n",
       "      <td>5.95</td>\n",
       "      <td>14390.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-5.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538564</th>\n",
       "      <td>C581409</td>\n",
       "      <td>82482</td>\n",
       "      <td>WOODEN PICTURE FRAME WHITE FINISH</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 14:08:00</td>\n",
       "      <td>2.95</td>\n",
       "      <td>12476.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-2.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538565</th>\n",
       "      <td>C581409</td>\n",
       "      <td>22173</td>\n",
       "      <td>METAL 4 HOOK HANGER FRENCH CHATEAU</td>\n",
       "      <td>-2</td>\n",
       "      <td>2011-12-08 14:08:00</td>\n",
       "      <td>3.29</td>\n",
       "      <td>12476.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-6.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538566</th>\n",
       "      <td>C581409</td>\n",
       "      <td>85199L</td>\n",
       "      <td>LARGE HANGING IVORY &amp; RED WOOD BIRD</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 14:08:00</td>\n",
       "      <td>0.65</td>\n",
       "      <td>12476.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-0.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538567</th>\n",
       "      <td>C581409</td>\n",
       "      <td>85127</td>\n",
       "      <td>SMALL SQUARE CUT GLASS CANDLESTICK</td>\n",
       "      <td>-5</td>\n",
       "      <td>2011-12-08 14:08:00</td>\n",
       "      <td>4.95</td>\n",
       "      <td>12476.0</td>\n",
       "      <td>Germany</td>\n",
       "      <td>-24.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540072</th>\n",
       "      <td>C581460</td>\n",
       "      <td>22197</td>\n",
       "      <td>POPCORN HOLDER</td>\n",
       "      <td>-5</td>\n",
       "      <td>2011-12-08 18:48:00</td>\n",
       "      <td>0.72</td>\n",
       "      <td>13078.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-3.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540073</th>\n",
       "      <td>C581460</td>\n",
       "      <td>22107</td>\n",
       "      <td>PIZZA PLATE IN BOX</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 18:48:00</td>\n",
       "      <td>1.25</td>\n",
       "      <td>13078.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-1.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540078</th>\n",
       "      <td>C581462</td>\n",
       "      <td>16219</td>\n",
       "      <td>HOUSE SHAPE PENCIL SHARPENER</td>\n",
       "      <td>-48</td>\n",
       "      <td>2011-12-08 18:51:00</td>\n",
       "      <td>0.06</td>\n",
       "      <td>12985.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-2.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540079</th>\n",
       "      <td>C581462</td>\n",
       "      <td>21642</td>\n",
       "      <td>ASSORTED TUTTI FRUTTI PEN</td>\n",
       "      <td>-72</td>\n",
       "      <td>2011-12-08 18:51:00</td>\n",
       "      <td>0.29</td>\n",
       "      <td>12985.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-20.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540080</th>\n",
       "      <td>C581463</td>\n",
       "      <td>85048</td>\n",
       "      <td>15CM CHRISTMAS GLASS BALL 20 LIGHTS</td>\n",
       "      <td>-2</td>\n",
       "      <td>2011-12-08 18:56:00</td>\n",
       "      <td>7.95</td>\n",
       "      <td>17526.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-15.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540081</th>\n",
       "      <td>C581464</td>\n",
       "      <td>23458</td>\n",
       "      <td>DOLLY CABINET 3 DRAWERS</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 18:57:00</td>\n",
       "      <td>14.95</td>\n",
       "      <td>15951.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-14.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540082</th>\n",
       "      <td>C581464</td>\n",
       "      <td>71477</td>\n",
       "      <td>COLOURED GLASS STAR T-LIGHT HOLDER</td>\n",
       "      <td>-6</td>\n",
       "      <td>2011-12-08 18:57:00</td>\n",
       "      <td>3.95</td>\n",
       "      <td>15951.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-23.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540083</th>\n",
       "      <td>C581465</td>\n",
       "      <td>23660</td>\n",
       "      <td>HENRIETTA HEN MUG</td>\n",
       "      <td>-2</td>\n",
       "      <td>2011-12-08 18:59:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>15755.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-3.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540084</th>\n",
       "      <td>C581465</td>\n",
       "      <td>22171</td>\n",
       "      <td>3 HOOK PHOTO SHELF ANTIQUE WHITE</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 18:59:00</td>\n",
       "      <td>8.50</td>\n",
       "      <td>15755.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-8.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540085</th>\n",
       "      <td>C581465</td>\n",
       "      <td>21876</td>\n",
       "      <td>POTTERING MUG</td>\n",
       "      <td>-4</td>\n",
       "      <td>2011-12-08 18:59:00</td>\n",
       "      <td>1.65</td>\n",
       "      <td>15755.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-6.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540086</th>\n",
       "      <td>C581465</td>\n",
       "      <td>20914</td>\n",
       "      <td>SET/5 RED RETROSPOT LID GLASS BOWLS</td>\n",
       "      <td>-3</td>\n",
       "      <td>2011-12-08 18:59:00</td>\n",
       "      <td>2.95</td>\n",
       "      <td>15755.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-8.85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540087</th>\n",
       "      <td>C581466</td>\n",
       "      <td>22838</td>\n",
       "      <td>3 TIER CAKE TIN RED AND CREAM</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 19:20:00</td>\n",
       "      <td>14.95</td>\n",
       "      <td>13883.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-14.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540088</th>\n",
       "      <td>C581466</td>\n",
       "      <td>22720</td>\n",
       "      <td>SET OF 3 CAKE TINS PANTRY DESIGN</td>\n",
       "      <td>-2</td>\n",
       "      <td>2011-12-08 19:20:00</td>\n",
       "      <td>4.95</td>\n",
       "      <td>13883.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540089</th>\n",
       "      <td>C581466</td>\n",
       "      <td>21216</td>\n",
       "      <td>SET 3 RETROSPOT TEA,COFFEE,SUGAR</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 19:20:00</td>\n",
       "      <td>4.95</td>\n",
       "      <td>13883.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-4.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540090</th>\n",
       "      <td>C581466</td>\n",
       "      <td>21535</td>\n",
       "      <td>RED RETROSPOT SMALL MILK JUG</td>\n",
       "      <td>-2</td>\n",
       "      <td>2011-12-08 19:20:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>13883.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-5.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540091</th>\n",
       "      <td>C581466</td>\n",
       "      <td>21232</td>\n",
       "      <td>STRAWBERRY CERAMIC TRINKET POT</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-08 19:20:00</td>\n",
       "      <td>1.25</td>\n",
       "      <td>13883.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-1.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540141</th>\n",
       "      <td>C581468</td>\n",
       "      <td>21314</td>\n",
       "      <td>SMALL GLASS HEART TRINKET POT</td>\n",
       "      <td>-10</td>\n",
       "      <td>2011-12-08 19:26:00</td>\n",
       "      <td>2.10</td>\n",
       "      <td>13599.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-21.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540142</th>\n",
       "      <td>C581468</td>\n",
       "      <td>22098</td>\n",
       "      <td>BOUDOIR SQUARE TISSUE BOX</td>\n",
       "      <td>-12</td>\n",
       "      <td>2011-12-08 19:26:00</td>\n",
       "      <td>0.39</td>\n",
       "      <td>13599.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-4.68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540176</th>\n",
       "      <td>C581470</td>\n",
       "      <td>23084</td>\n",
       "      <td>RABBIT NIGHT LIGHT</td>\n",
       "      <td>-4</td>\n",
       "      <td>2011-12-08 19:28:00</td>\n",
       "      <td>2.08</td>\n",
       "      <td>17924.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-8.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540422</th>\n",
       "      <td>C581484</td>\n",
       "      <td>23843</td>\n",
       "      <td>PAPER CRAFT , LITTLE BIRDIE</td>\n",
       "      <td>-80995</td>\n",
       "      <td>2011-12-09 09:27:00</td>\n",
       "      <td>2.08</td>\n",
       "      <td>16446.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-168469.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540448</th>\n",
       "      <td>C581490</td>\n",
       "      <td>22178</td>\n",
       "      <td>VICTORIAN GLASS HANGING T-LIGHT</td>\n",
       "      <td>-12</td>\n",
       "      <td>2011-12-09 09:57:00</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14397.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-23.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540449</th>\n",
       "      <td>C581490</td>\n",
       "      <td>23144</td>\n",
       "      <td>ZINC T-LIGHT HOLDER STARS SMALL</td>\n",
       "      <td>-11</td>\n",
       "      <td>2011-12-09 09:57:00</td>\n",
       "      <td>0.83</td>\n",
       "      <td>14397.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-9.13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541541</th>\n",
       "      <td>C581499</td>\n",
       "      <td>M</td>\n",
       "      <td>Manual</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-09 10:28:00</td>\n",
       "      <td>224.69</td>\n",
       "      <td>15498.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-224.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541715</th>\n",
       "      <td>C581568</td>\n",
       "      <td>21258</td>\n",
       "      <td>VICTORIAN SEWING BOX LARGE</td>\n",
       "      <td>-5</td>\n",
       "      <td>2011-12-09 11:57:00</td>\n",
       "      <td>10.95</td>\n",
       "      <td>15311.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-54.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541716</th>\n",
       "      <td>C581569</td>\n",
       "      <td>84978</td>\n",
       "      <td>HANGING HEART JAR T-LIGHT HOLDER</td>\n",
       "      <td>-1</td>\n",
       "      <td>2011-12-09 11:58:00</td>\n",
       "      <td>1.25</td>\n",
       "      <td>17315.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-1.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541717</th>\n",
       "      <td>C581569</td>\n",
       "      <td>20979</td>\n",
       "      <td>36 PENCILS TUBE RED RETROSPOT</td>\n",
       "      <td>-5</td>\n",
       "      <td>2011-12-09 11:58:00</td>\n",
       "      <td>1.25</td>\n",
       "      <td>17315.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>-6.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9288 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       InvoiceNo StockCode                       Description  Quantity  \\\n",
       "141      C536379         D                          Discount        -1   \n",
       "154      C536383    35004C   SET OF 3 COLOURED  FLYING DUCKS        -1   \n",
       "235      C536391     22556    PLASTERS IN TIN CIRCUS PARADE        -12   \n",
       "236      C536391     21984  PACK OF 12 PINK PAISLEY TISSUES        -24   \n",
       "237      C536391     21983  PACK OF 12 BLUE PAISLEY TISSUES        -24   \n",
       "...          ...       ...                               ...       ...   \n",
       "540449   C581490     23144   ZINC T-LIGHT HOLDER STARS SMALL       -11   \n",
       "541541   C581499         M                            Manual        -1   \n",
       "541715   C581568     21258        VICTORIAN SEWING BOX LARGE        -5   \n",
       "541716   C581569     84978  HANGING HEART JAR T-LIGHT HOLDER        -1   \n",
       "541717   C581569     20979     36 PENCILS TUBE RED RETROSPOT        -5   \n",
       "\n",
       "               InvoiceDate  UnitPrice  CustomerID         Country  TotalPrice  \n",
       "141    2010-12-01 09:41:00      27.50     14527.0  United Kingdom      -27.50  \n",
       "154    2010-12-01 09:49:00       4.65     15311.0  United Kingdom       -4.65  \n",
       "235    2010-12-01 10:24:00       1.65     17548.0  United Kingdom      -19.80  \n",
       "236    2010-12-01 10:24:00       0.29     17548.0  United Kingdom       -6.96  \n",
       "237    2010-12-01 10:24:00       0.29     17548.0  United Kingdom       -6.96  \n",
       "...                    ...        ...         ...             ...         ...  \n",
       "540449 2011-12-09 09:57:00       0.83     14397.0  United Kingdom       -9.13  \n",
       "541541 2011-12-09 10:28:00     224.69     15498.0  United Kingdom     -224.69  \n",
       "541715 2011-12-09 11:57:00      10.95     15311.0  United Kingdom      -54.75  \n",
       "541716 2011-12-09 11:58:00       1.25     17315.0  United Kingdom       -1.25  \n",
       "541717 2011-12-09 11:58:00       1.25     17315.0  United Kingdom       -6.25  \n",
       "\n",
       "[9288 rows x 9 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data.InvoiceNo.astype('str').str.startswith('C')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data[~data.InvoiceNo.astype('str').str.startswith('C')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/awantik/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "data['Description'] = data.Description.str.strip()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Basket Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_Germany = data[data.Country == 'Germany']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "InvoiceNo  Description                        \n",
       "536527     3 HOOK HANGER MAGIC GARDEN             12\n",
       "           5 HOOK HANGER MAGIC TOADSTOOL          12\n",
       "           5 HOOK HANGER RED MAGIC TOADSTOOL      12\n",
       "           ASSORTED COLOUR LIZARD SUCTION HOOK    24\n",
       "           CHILDREN'S CIRCUS PARADE MUG           12\n",
       "                                                  ..\n",
       "581578     SPOTTY BUNTING                          9\n",
       "           VINTAGE DONKEY TAIL GAME                6\n",
       "           WRAP ALPHABET POSTER                   25\n",
       "           WRAP CIRCUS PARADE                     25\n",
       "           WRAP RED APPLES                        25\n",
       "Name: Quantity, Length: 9015, dtype: int64"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_Germany.groupby(['InvoiceNo','Description'])['Quantity'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "WHITE HANGING HEART T-LIGHT HOLDER    2327\n",
       "JUMBO BAG RED RETROSPOT               2115\n",
       "REGENCY CAKESTAND 3 TIER              2019\n",
       "PARTY BUNTING                         1707\n",
       "LUNCH BAG RED RETROSPOT               1594\n",
       "                                      ... \n",
       "Dagamed                                  1\n",
       "PINK CRYSTAL GUITAR PHONE CHARM          1\n",
       "???lost                                  1\n",
       "mystery! Only ever imported 1800         1\n",
       "PAINTED HEART WREATH WITH BELL           1\n",
       "Name: Description, Length: 4194, dtype: int64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.Description.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "basket_Germany = data[data['Country'] ==\"France\"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Description</th>\n",
       "      <th>10 COLOUR SPACEBOY PEN</th>\n",
       "      <th>12 COLOURED PARTY BALLOONS</th>\n",
       "      <th>12 EGG HOUSE PAINTED WOOD</th>\n",
       "      <th>12 MESSAGE CARDS WITH ENVELOPES</th>\n",
       "      <th>12 PENCIL SMALL TUBE WOODLAND</th>\n",
       "      <th>12 PENCILS SMALL TUBE RED RETROSPOT</th>\n",
       "      <th>12 PENCILS SMALL TUBE SKULL</th>\n",
       "      <th>12 PENCILS TALL TUBE POSY</th>\n",
       "      <th>12 PENCILS TALL TUBE RED RETROSPOT</th>\n",
       "      <th>12 PENCILS TALL TUBE WOODLAND</th>\n",
       "      <th>...</th>\n",
       "      <th>WRAP VINTAGE PETALS  DESIGN</th>\n",
       "      <th>YELLOW COAT RACK PARIS FASHION</th>\n",
       "      <th>YELLOW GIANT GARDEN THERMOMETER</th>\n",
       "      <th>YELLOW SHARK HELICOPTER</th>\n",
       "      <th>ZINC  STAR T-LIGHT HOLDER</th>\n",
       "      <th>ZINC FOLKART SLEIGH BELLS</th>\n",
       "      <th>ZINC HERB GARDEN CONTAINER</th>\n",
       "      <th>ZINC METAL HEART DECORATION</th>\n",
       "      <th>ZINC T-LIGHT HOLDER STAR LARGE</th>\n",
       "      <th>ZINC T-LIGHT HOLDER STARS SMALL</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>536370</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>536852</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>536974</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537065</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537463</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1563 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \\\n",
       "InvoiceNo                                                         \n",
       "536370                          0.0                         0.0   \n",
       "536852                          0.0                         0.0   \n",
       "536974                          0.0                         0.0   \n",
       "537065                          0.0                         0.0   \n",
       "537463                          0.0                         0.0   \n",
       "\n",
       "Description  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \\\n",
       "InvoiceNo                                                                 \n",
       "536370                             0.0                              0.0   \n",
       "536852                             0.0                              0.0   \n",
       "536974                             0.0                              0.0   \n",
       "537065                             0.0                              0.0   \n",
       "537463                             0.0                              0.0   \n",
       "\n",
       "Description  12 PENCIL SMALL TUBE WOODLAND  \\\n",
       "InvoiceNo                                    \n",
       "536370                                 0.0   \n",
       "536852                                 0.0   \n",
       "536974                                 0.0   \n",
       "537065                                 0.0   \n",
       "537463                                 0.0   \n",
       "\n",
       "Description  12 PENCILS SMALL TUBE RED RETROSPOT  12 PENCILS SMALL TUBE SKULL  \\\n",
       "InvoiceNo                                                                       \n",
       "536370                                       0.0                          0.0   \n",
       "536852                                       0.0                          0.0   \n",
       "536974                                       0.0                          0.0   \n",
       "537065                                       0.0                          0.0   \n",
       "537463                                       0.0                          0.0   \n",
       "\n",
       "Description  12 PENCILS TALL TUBE POSY  12 PENCILS TALL TUBE RED RETROSPOT  \\\n",
       "InvoiceNo                                                                    \n",
       "536370                             0.0                                 0.0   \n",
       "536852                             0.0                                 0.0   \n",
       "536974                             0.0                                 0.0   \n",
       "537065                             0.0                                 0.0   \n",
       "537463                             0.0                                 0.0   \n",
       "\n",
       "Description  12 PENCILS TALL TUBE WOODLAND  ...  WRAP VINTAGE PETALS  DESIGN  \\\n",
       "InvoiceNo                                   ...                                \n",
       "536370                                 0.0  ...                          0.0   \n",
       "536852                                 0.0  ...                          0.0   \n",
       "536974                                 0.0  ...                          0.0   \n",
       "537065                                 0.0  ...                          0.0   \n",
       "537463                                 0.0  ...                          0.0   \n",
       "\n",
       "Description  YELLOW COAT RACK PARIS FASHION  YELLOW GIANT GARDEN THERMOMETER  \\\n",
       "InvoiceNo                                                                      \n",
       "536370                                  0.0                              0.0   \n",
       "536852                                  0.0                              0.0   \n",
       "536974                                  0.0                              0.0   \n",
       "537065                                  0.0                              0.0   \n",
       "537463                                  0.0                              0.0   \n",
       "\n",
       "Description  YELLOW SHARK HELICOPTER  ZINC  STAR T-LIGHT HOLDER  \\\n",
       "InvoiceNo                                                         \n",
       "536370                           0.0                        0.0   \n",
       "536852                           0.0                        0.0   \n",
       "536974                           0.0                        0.0   \n",
       "537065                           0.0                        0.0   \n",
       "537463                           0.0                        0.0   \n",
       "\n",
       "Description  ZINC FOLKART SLEIGH BELLS  ZINC HERB GARDEN CONTAINER  \\\n",
       "InvoiceNo                                                            \n",
       "536370                             0.0                         0.0   \n",
       "536852                             0.0                         0.0   \n",
       "536974                             0.0                         0.0   \n",
       "537065                             0.0                         0.0   \n",
       "537463                             0.0                         0.0   \n",
       "\n",
       "Description  ZINC METAL HEART DECORATION  ZINC T-LIGHT HOLDER STAR LARGE  \\\n",
       "InvoiceNo                                                                  \n",
       "536370                               0.0                             0.0   \n",
       "536852                               0.0                             0.0   \n",
       "536974                               0.0                             0.0   \n",
       "537065                               0.0                             0.0   \n",
       "537463                               0.0                             0.0   \n",
       "\n",
       "Description  ZINC T-LIGHT HOLDER STARS SMALL  \n",
       "InvoiceNo                                     \n",
       "536370                                   0.0  \n",
       "536852                                   0.0  \n",
       "536974                                   0.0  \n",
       "537065                                   0.0  \n",
       "537463                                   0.0  \n",
       "\n",
       "[5 rows x 1563 columns]"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "basket_Germany.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "basket_encoded = basket_Germany.applymap(lambda x: 0 if x <=0 else 1) \n",
    "basket_Germany = basket_encoded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "frq_items = apriori(basket_Germany, min_support = 0.05, use_colnames = True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "rules = association_rules(frq_items, metric =\"confidence\", min_threshold = .1) \n",
    "# rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) \n",
    "# print(rules.head()) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>antecedents</th>\n",
       "      <th>consequents</th>\n",
       "      <th>antecedent support</th>\n",
       "      <th>consequent support</th>\n",
       "      <th>support</th>\n",
       "      <th>confidence</th>\n",
       "      <th>lift</th>\n",
       "      <th>leverage</th>\n",
       "      <th>conviction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>(JUMBO BAG WOODLAND ANIMALS)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.076531</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.076531</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.306667</td>\n",
       "      <td>0.017961</td>\n",
       "      <td>inf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230</th>\n",
       "      <td>(RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.051020</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.051020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.306667</td>\n",
       "      <td>0.011974</td>\n",
       "      <td>inf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>(PLASTERS IN TIN WOODLAND ANIMALS, RED TOADSTO...</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.053571</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.053571</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.306667</td>\n",
       "      <td>0.012573</td>\n",
       "      <td>inf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268</th>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER PLATES)</td>\n",
       "      <td>0.102041</td>\n",
       "      <td>0.127551</td>\n",
       "      <td>0.099490</td>\n",
       "      <td>0.975000</td>\n",
       "      <td>7.644000</td>\n",
       "      <td>0.086474</td>\n",
       "      <td>34.897959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS)</td>\n",
       "      <td>0.102041</td>\n",
       "      <td>0.137755</td>\n",
       "      <td>0.099490</td>\n",
       "      <td>0.975000</td>\n",
       "      <td>7.077778</td>\n",
       "      <td>0.085433</td>\n",
       "      <td>34.489796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>300</th>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS, POSTAGE, SET/20 ...</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER PLATES)</td>\n",
       "      <td>0.084184</td>\n",
       "      <td>0.127551</td>\n",
       "      <td>0.081633</td>\n",
       "      <td>0.969697</td>\n",
       "      <td>7.602424</td>\n",
       "      <td>0.070895</td>\n",
       "      <td>28.790816</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302</th>\n",
       "      <td>(SET/20 RED RETROSPOT PAPER NAPKINS, POSTAGE, ...</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS)</td>\n",
       "      <td>0.084184</td>\n",
       "      <td>0.137755</td>\n",
       "      <td>0.081633</td>\n",
       "      <td>0.969697</td>\n",
       "      <td>7.039282</td>\n",
       "      <td>0.070036</td>\n",
       "      <td>28.454082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>(RED RETROSPOT PICNIC BAG)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.068878</td>\n",
       "      <td>0.964286</td>\n",
       "      <td>1.260000</td>\n",
       "      <td>0.014213</td>\n",
       "      <td>6.571429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>(SET OF 9 BLACK SKULL BALLOONS)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.066327</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.063776</td>\n",
       "      <td>0.961538</td>\n",
       "      <td>1.256410</td>\n",
       "      <td>0.013015</td>\n",
       "      <td>6.102041</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>(SET/6 RED SPOTTY PAPER PLATES)</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS)</td>\n",
       "      <td>0.127551</td>\n",
       "      <td>0.137755</td>\n",
       "      <td>0.122449</td>\n",
       "      <td>0.960000</td>\n",
       "      <td>6.968889</td>\n",
       "      <td>0.104878</td>\n",
       "      <td>21.556122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>(PACK OF 6 SKULL PAPER CUPS)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.063776</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.061224</td>\n",
       "      <td>0.960000</td>\n",
       "      <td>1.254400</td>\n",
       "      <td>0.012417</td>\n",
       "      <td>5.867347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>(RETROSPOT PARTY BAG + STICKER SET)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.061224</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.058673</td>\n",
       "      <td>0.958333</td>\n",
       "      <td>1.252222</td>\n",
       "      <td>0.011818</td>\n",
       "      <td>5.632653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>(GUMBALL COAT RACK)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.058673</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.056122</td>\n",
       "      <td>0.956522</td>\n",
       "      <td>1.249855</td>\n",
       "      <td>0.011219</td>\n",
       "      <td>5.397959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>(PACK OF 6 SKULL PAPER PLATES)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.056122</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.053571</td>\n",
       "      <td>0.954545</td>\n",
       "      <td>1.247273</td>\n",
       "      <td>0.010621</td>\n",
       "      <td>5.163265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>263</th>\n",
       "      <td>(POSTAGE, SET/6 RED SPOTTY PAPER PLATES)</td>\n",
       "      <td>(SET/6 RED SPOTTY PAPER CUPS)</td>\n",
       "      <td>0.107143</td>\n",
       "      <td>0.137755</td>\n",
       "      <td>0.102041</td>\n",
       "      <td>0.952381</td>\n",
       "      <td>6.913580</td>\n",
       "      <td>0.087281</td>\n",
       "      <td>18.107143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>(JAM MAKING SET PRINTED)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.053571</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.051020</td>\n",
       "      <td>0.952381</td>\n",
       "      <td>1.244444</td>\n",
       "      <td>0.010022</td>\n",
       "      <td>4.928571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>(TEA PARTY BIRTHDAY CARD)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.094388</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.089286</td>\n",
       "      <td>0.945946</td>\n",
       "      <td>1.236036</td>\n",
       "      <td>0.017050</td>\n",
       "      <td>4.341837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>(STRAWBERRY LUNCH BOX WITH CUTLERY)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.122449</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.114796</td>\n",
       "      <td>0.937500</td>\n",
       "      <td>1.225000</td>\n",
       "      <td>0.021085</td>\n",
       "      <td>3.755102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>(ROUND SNACK BOXES SET OF4 WOODLAND)</td>\n",
       "      <td>(POSTAGE)</td>\n",
       "      <td>0.158163</td>\n",
       "      <td>0.765306</td>\n",
       "      <td>0.147959</td>\n",
       "      <td>0.935484</td>\n",
       "      <td>1.222366</td>\n",
       "      <td>0.026916</td>\n",
       "      <td>3.637755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>(CHILDRENS CUTLERY SPACEBOY)</td>\n",
       "      <td>(CHILDRENS CUTLERY DOLLY GIRL)</td>\n",
       "      <td>0.068878</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>0.063776</td>\n",
       "      <td>0.925926</td>\n",
       "      <td>12.962963</td>\n",
       "      <td>0.058856</td>\n",
       "      <td>12.535714</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           antecedents  \\\n",
       "35                        (JUMBO BAG WOODLAND ANIMALS)   \n",
       "230  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   \n",
       "240  (PLASTERS IN TIN WOODLAND ANIMALS, RED TOADSTO...   \n",
       "268  (SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...   \n",
       "269  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   \n",
       "300  (SET/6 RED SPOTTY PAPER CUPS, POSTAGE, SET/20 ...   \n",
       "302  (SET/20 RED RETROSPOT PAPER NAPKINS, POSTAGE, ...   \n",
       "114                         (RED RETROSPOT PICNIC BAG)   \n",
       "126                    (SET OF 9 BLACK SKULL BALLOONS)   \n",
       "154                    (SET/6 RED SPOTTY PAPER PLATES)   \n",
       "78                        (PACK OF 6 SKULL PAPER CUPS)   \n",
       "119                (RETROSPOT PARTY BAG + STICKER SET)   \n",
       "28                                 (GUMBALL COAT RACK)   \n",
       "79                      (PACK OF 6 SKULL PAPER PLATES)   \n",
       "263           (POSTAGE, SET/6 RED SPOTTY PAPER PLATES)   \n",
       "29                            (JAM MAKING SET PRINTED)   \n",
       "141                          (TEA PARTY BIRTHDAY CARD)   \n",
       "138                (STRAWBERRY LUNCH BOX WITH CUTLERY)   \n",
       "124               (ROUND SNACK BOXES SET OF4 WOODLAND)   \n",
       "20                        (CHILDRENS CUTLERY SPACEBOY)   \n",
       "\n",
       "                         consequents  antecedent support  consequent support  \\\n",
       "35                         (POSTAGE)            0.076531            0.765306   \n",
       "230                        (POSTAGE)            0.051020            0.765306   \n",
       "240                        (POSTAGE)            0.053571            0.765306   \n",
       "268  (SET/6 RED SPOTTY PAPER PLATES)            0.102041            0.127551   \n",
       "269    (SET/6 RED SPOTTY PAPER CUPS)            0.102041            0.137755   \n",
       "300  (SET/6 RED SPOTTY PAPER PLATES)            0.084184            0.127551   \n",
       "302    (SET/6 RED SPOTTY PAPER CUPS)            0.084184            0.137755   \n",
       "114                        (POSTAGE)            0.071429            0.765306   \n",
       "126                        (POSTAGE)            0.066327            0.765306   \n",
       "154    (SET/6 RED SPOTTY PAPER CUPS)            0.127551            0.137755   \n",
       "78                         (POSTAGE)            0.063776            0.765306   \n",
       "119                        (POSTAGE)            0.061224            0.765306   \n",
       "28                         (POSTAGE)            0.058673            0.765306   \n",
       "79                         (POSTAGE)            0.056122            0.765306   \n",
       "263    (SET/6 RED SPOTTY PAPER CUPS)            0.107143            0.137755   \n",
       "29                         (POSTAGE)            0.053571            0.765306   \n",
       "141                        (POSTAGE)            0.094388            0.765306   \n",
       "138                        (POSTAGE)            0.122449            0.765306   \n",
       "124                        (POSTAGE)            0.158163            0.765306   \n",
       "20    (CHILDRENS CUTLERY DOLLY GIRL)            0.068878            0.071429   \n",
       "\n",
       "      support  confidence       lift  leverage  conviction  \n",
       "35   0.076531    1.000000   1.306667  0.017961         inf  \n",
       "230  0.051020    1.000000   1.306667  0.011974         inf  \n",
       "240  0.053571    1.000000   1.306667  0.012573         inf  \n",
       "268  0.099490    0.975000   7.644000  0.086474   34.897959  \n",
       "269  0.099490    0.975000   7.077778  0.085433   34.489796  \n",
       "300  0.081633    0.969697   7.602424  0.070895   28.790816  \n",
       "302  0.081633    0.969697   7.039282  0.070036   28.454082  \n",
       "114  0.068878    0.964286   1.260000  0.014213    6.571429  \n",
       "126  0.063776    0.961538   1.256410  0.013015    6.102041  \n",
       "154  0.122449    0.960000   6.968889  0.104878   21.556122  \n",
       "78   0.061224    0.960000   1.254400  0.012417    5.867347  \n",
       "119  0.058673    0.958333   1.252222  0.011818    5.632653  \n",
       "28   0.056122    0.956522   1.249855  0.011219    5.397959  \n",
       "79   0.053571    0.954545   1.247273  0.010621    5.163265  \n",
       "263  0.102041    0.952381   6.913580  0.087281   18.107143  \n",
       "29   0.051020    0.952381   1.244444  0.010022    4.928571  \n",
       "141  0.089286    0.945946   1.236036  0.017050    4.341837  \n",
       "138  0.114796    0.937500   1.225000  0.021085    3.755102  \n",
       "124  0.147959    0.935484   1.222366  0.026916    3.637755  \n",
       "20   0.063776    0.925926  12.962963  0.058856   12.535714  "
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "rules['name'] = rules.antecedents.astype('str')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>antecedents</th>\n",
       "      <th>consequents</th>\n",
       "      <th>antecedent support</th>\n",
       "      <th>consequent support</th>\n",
       "      <th>support</th>\n",
       "      <th>confidence</th>\n",
       "      <th>lift</th>\n",
       "      <th>leverage</th>\n",
       "      <th>conviction</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>(CHILDRENS CUTLERY SPACEBOY)</td>\n",
       "      <td>(CHILDRENS CUTLERY DOLLY GIRL)</td>\n",
       "      <td>0.068878</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>0.063776</td>\n",
       "      <td>0.925926</td>\n",
       "      <td>12.962963</td>\n",
       "      <td>0.058856</td>\n",
       "      <td>12.535714</td>\n",
       "      <td>frozenset({'CHILDRENS CUTLERY SPACEBOY'})</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     antecedents                     consequents  \\\n",
       "20  (CHILDRENS CUTLERY SPACEBOY)  (CHILDRENS CUTLERY DOLLY GIRL)   \n",
       "\n",
       "    antecedent support  consequent support   support  confidence       lift  \\\n",
       "20            0.068878            0.071429  0.063776    0.925926  12.962963   \n",
       "\n",
       "    leverage  conviction                                       name  \n",
       "20  0.058856   12.535714  frozenset({'CHILDRENS CUTLERY SPACEBOY'})  "
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules[rules.name.str.contains('CHILDRENS CUTLERY SPACEBOY')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
